/*
 *   Copyright (c) 2018. 刘路 All rights reserved
 *   版权所有 刘路 并保留所有权利 2018.
 *   ===============================================================
 *   这不是一个自由软件！您只能在不用于商业目的的前提下对程序代码进行修改和
 *   使用。不允许对程序代码以任何形式任何目的的再发布。如果项目发布携带作者
 *   认可的特殊 LICENSE 则按照 LICENSE 执行，废除上面内容。请保留原作者信息。
 *   ================================================================
 *   刘路（feedback@zhoyq.com）于 2018. 创建
 *   http://zhoyq.com
 */

package com.zhoyq.helper.text;

import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
 * 检测过滤敏感词
 * @author 刘路
 */
public class SensitiveWordFilter {
    /**
     * 最小匹配规则
     */
    public final static int minMatchTYpe = 1;
    /**
     * 最大匹配规则
     */
    public final static int maxMatchType = 2;

    public static void init(File ... file){
        for(File f:file){
            SensitiveWordReader.add(f);
        }
    }

    public static void initDefault(){
        File[] files = {
                new File(SensitiveWordFilter.class.getResource("sensitive/ad.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/dirty.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/drug.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/forum.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/illegal.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/leader.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/news.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/other.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/politics.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/porn.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/study.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/superstition.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/url.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/violence.md").getPath()),
                new File(SensitiveWordFilter.class.getResource("sensitive/web.md").getPath())
        };
        init(files);
    }

    /**
     * 检测是否含有敏感词
     * @param words
     * @return
     */
    public static boolean test(String words,int matchType){
        for(int i = 0 ; i < words.length() ; i++){
            // 判断是否包含敏感字符
            int matchFlag = check(words, i, matchType);
            // 大于0存在，返回true
            if(matchFlag > 0){
                return true;
            }
        }
        return false;
    }
    public static boolean test(String words){
        return test(words,1);
    }

    /**
     * 检测文字中的敏感字符 存在返回 敏感字符所在位置
     * @param words
     * @param begin
     * @param matchType
     * @return
     */
    public static int check(String words,int begin,int matchType){
        // 敏感词结束标识位：用于敏感词只有1位的情况
        boolean  flag = false;
        // 匹配标识数默认为0
        int matchFlag = 0;
        char word = 0;
        Map nowMap = SensitiveWordReader.sensitiveWords;
        for(int i = begin; i < words.length() ; i++){
            word = words.charAt(i);
            // 获取指定key
            nowMap = (Map) nowMap.get(String.valueOf(word));
            // 存在，则判断是否为最后一个
            if(nowMap != null){
                // 找到相应key，匹配标识+1
                matchFlag++;
                // 如果为最后一个匹配规则,结束循环，返回匹配标识数
                if("1".equals(nowMap.get("isEnd"))){
                    // 结束标志位为true
                    flag = true;
                    // 最小规则，直接返回,最大规则还需继续查找
                    if( minMatchTYpe == matchType){
                        break;
                    }
                }
            } else {
                // 不存在，直接返回
                break;
            }
        }
        // 长度必须大于等于1，为词
        if(matchFlag < 2 || !flag){
            matchFlag = 0;
        }
        return matchFlag;
    }
    public static int check(String words,int begin){
        return check(words,begin,1);
    }

    /**
     * 查找敏感词
     * @return
     */
    public static Set<String> find(String words, int matchType){
        Set<String> sensitiveWordList = new HashSet<String>();

        for(int i = 0 ; i < words.length() ; i++){
            //判断是否包含敏感字符
            int length = check(words, i, matchType);
            // 存在,加入list中
            if(length > 0){
                sensitiveWordList.add(words.substring(i, i+length));
                // 减1的原因，是因为for会自增
                i = i + length - 1;
            }
        }

        return sensitiveWordList;
    }
    public static Set<String> find(String words){
        return find(words,1);
    }

    /**
     * 替换敏感词
     * @param words
     * @return
     */
    public static String replace(String words,String replaceChar,int matchType){
        String resultTxt = words;
        // 获取所有的敏感词
        Set<String> set = find(words, matchType);
        Iterator<String> iterator = set.iterator();
        String word = null;
        String replaceString = null;
        while (iterator.hasNext()) {
            word = iterator.next();
            replaceString = getReplaceChars(replaceChar, word.length());
            resultTxt = resultTxt.replaceAll(word, replaceString);
        }
        return resultTxt;
    }
    public static String replace(String words,String replaceChar ){
        return replace(words,replaceChar,1);
    }

    /**
     * 组合替换字符串长度的字符串
     * @return
     */
    private static String getReplaceChars(String replaceChar,int length){
        String resultReplace = replaceChar;
        for(int i = 1 ; i < length ; i++){
            resultReplace += replaceChar;
        }
        return resultReplace;
    }

}
